library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Get the cumulative case data, first for SCC.

# remDr <- remoteDriver(
#   remoteServerAddr = "192.168.86.25",
#   port = 4445L
# )
# remDr$open()
# 
# remDr$navigate("https://app.powerbigov.us/view?r=eyJrIjoiZTg2MTlhMWQtZWE5OC00ZDI3LWE4NjAtMTU3YWYwZDRlOTNmIiwidCI6IjBhYzMyMDJmLWMzZTktNGY1Ni04MzBkLTAxN2QwOWQxNmIzZiJ9")
# 
# webElem <- remDr$findElements(using = "class", value = "column")
# 
# cases <-
#   1:length(webElem) %>%
#   map(function(x){
#     webElem[[x]]$getElementAttribute("aria-label") %>% as.character()
#   }) %>%
#   unlist() %>%
#   as.data.frame()
# 
# scc_cumulative_cases <-
#   cases %>%
#   rename(text = ".") %>%
#   filter(grepl("Total_cases",text)) %>%
#   separate(text, c("date","cases"), sep = "\\.") %>%
#   mutate(
#     date =
#       substr(date,6,nchar(.)) %>%
#       as.Date("%A, %B %d, %Y"),
#     cases =
#       substr(cases,13,nchar(.)) %>%
#       as.numeric()
#   )
# 
# saveRDS(scc_cumulative_cases, file = "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/scc_cumulative_cases.rds")

scc_cumulative_cases <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/scc_cumulative_cases.rds")

Also for SMC.

# remDr$navigate("https://app.powerbigov.us/view?r=eyJrIjoiMWI5NmE5M2ItOTUwMC00NGNmLWEzY2UtOTQyODA1YjQ1NWNlIiwidCI6IjBkZmFmNjM1LWEwNGQtNDhjYy1hN2UzLTZkYTFhZjA4ODNmOSJ9")
# 
# webElem <- remDr$findElements(using = "class", value = "column")
# 
# tests <-
#   1:length(webElem) %>%
#   map(function(x){
#     webElem[[x]]$getElementAttribute("aria-label") %>% as.character()
#   }) %>%
#   unlist() %>%
#   as.data.frame()
# 
# tests_clean <-
#   tests %>%
#   rename(text = ".") %>%
#   separate(text, c("date","test_text"), sep = "\\.") %>%
#   separate(test_text, c(NA,"type",NA,"tests")) %>%
#   mutate(
#     date =
#       substr(date,23,nchar(.)) %>%
#       as.Date("%A, %B %d, %Y"),
#     tests =
#       tests %>%
#       as.numeric()
#   ) %>%
#   spread(
#     key = type,
#     value = tests
#   ) %>%
#   mutate(
#     total = Negative + Pending + Positive,
#     perc_positive = Positive/total,
#     perc_positive_movavg = movavg(perc_positive, 7, type = "s")
#   )
# 
# smc_cumulative_cases <- tests_clean %>%
#   mutate(cumulative_cases = cumsum(Positive), cumulative_negative = cumsum(Negative), cumulative_total = cumulative_cases+cumulative_negative, perc_positive_cumulative = cumulative_cases*100 / cumulative_total, perc_positive_cumulative_mov = movavg(perc_positive_cumulative, 7, type = "s"))
# 
# saveRDS(smc_cumulative_cases, file = "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/smc_cumulative_cases.rds")

smc_cumulative_cases <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/smc_cumulative_cases.rds")

Get social distancing data.

scc_blockgroups <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

smc_blockgroups <-
  block_groups("CA","San Mateo", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date())

# obtaining weekends
weekends <- bay_sd %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(weekend = ifelse((date %>% as.numeric()) %% 7 %in% c(2,3), T, F)) %>% 
  dplyr::select(date,weekend)

bay_sd <- bay_sd %>% left_join(weekends)

SCC data processing.

scc_cases_sd_daily <- bay_sd %>% 
  filter(origin_census_block_group %in% scc_blockgroups$GEOID) %>%
  group_by(date) %>%
  summarize(total_at_home = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  mutate(
    percent_at_home = total_at_home*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home),
  ) %>% 
  left_join(
    scc_cumulative_cases
  ) %>% 
  filter(date >= min(scc_cumulative_cases$date))

# get the baseline percent of people at home
pre_case_growth_at_home_scc <- bay_sd %>%
  filter(date < min(scc_cumulative_cases$date)) %>%
  filter(origin_census_block_group %in% scc_blockgroups$GEOID) %>%
  summarize(total_at_home = sum(completely_home_device_count), total_devices = sum(device_count)) %>%
  mutate(percent_at_home = total_at_home*100/total_devices, percent_leaving_home = (100 - percent_at_home))

# include change in percent leaving home
scc_cases_sd_daily <- scc_cases_sd_daily %>%
  mutate(leaving_home_dif = percent_leaving_home - pre_case_growth_at_home_scc$percent_leaving_home[1],
         log_cases = log(cases))

# compute number of differences for stationarity
ndiffs(scc_cases_sd_daily$cases)
## [1] 2
ndiffs(scc_cases_sd_daily$log_cases[-1])
## [1] 2
ndiffs(scc_cases_sd_daily$leaving_home_dif)
## [1] 1
scc_test_big <- scc_cases_sd_daily %>%
  mutate(
    dlog_cases = c(NA,diff(log_cases)),
    d2log_cases = c(NA,NA,diff(log_cases, differences = 2)),
    dcases = c(NA,diff(cases)),
    d2cases = c(NA,NA,diff(dcases, differences = 2)),
    dleaving = c(NA,diff(leaving_home_dif)),
    d2leaving = c(NA,NA,diff(leaving_home_dif, differences = 2)),
    cases_mov = movavg(cases, 7, type = "s"),
    log_cases_mov = movavg(log_cases, 7, type = "s"),
    dlog_cases_mov = c(NA,diff(log_cases_mov)),
    d2log_cases_mov = c(NA,NA,diff(log_cases_mov, differences = 2)),
    dcases_mov = c(NA,diff(cases_mov)),
    d2cases_mov = c(NA,diff(dcases_mov)),
    leaving_mov = movavg(leaving_home_dif, 7, type = "s"),
    dleaving_mov = c(NA,diff(leaving_mov)),
    d2leaving_mov = c(NA,diff(dleaving_mov)),
    leaving4 = c(rep(NA,4), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-4)]),
    dleaving4 = c(NA,diff(leaving4)),
    d2leaving4 = c(NA,NA,diff(leaving4, differences = 2)),
    leaving3 = c(rep(NA,3), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-3)]),
    leaving3_mov = movavg(leaving3, 7, type = "s"),
    dleaving3_mov = c(NA,diff(leaving3_mov)),
    d2leaving3_mov = c(NA,NA,diff(leaving3_mov, differences = 2)),
    leaving4_mov = movavg(leaving4, 7, type = "s"),
    dleaving4_mov = c(NA,diff(leaving4_mov)),
    d2leaving4_mov = c(NA,NA,diff(leaving4_mov, differences = 2)),
    leaving5 = c(rep(NA,5), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-5)]),
    leaving5_mov = movavg(leaving5, 7, type = "s"),
    dleaving5_mov = c(NA,diff(leaving5_mov)),
    d2leaving5_mov = c(NA,NA,diff(leaving5_mov, differences = 2)),
    leaving6 = c(rep(NA,6), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-6)]),
    leaving6_mov = movavg(leaving6, 7, type = "s"),
    dleaving6_mov = c(NA,diff(leaving6_mov)),
    d2leaving6_mov = c(NA,NA,diff(leaving6_mov, differences = 2)),
    leaving7 = c(rep(NA,7), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-7)]),
    leaving7_mov = movavg(leaving7, 7, type = "s"),
    dleaving7_mov = c(NA,diff(leaving7_mov)),
    d2leaving7_mov = c(NA,NA,diff(leaving7_mov, differences = 2)),
    leaving8 = c(rep(NA,8), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-8)]),
    leaving8_mov = movavg(leaving8, 7, type = "s"),
    dleaving8_mov = c(NA,diff(leaving8_mov)),
    d2leaving8_mov = c(NA,NA,diff(leaving8_mov, differences = 2)),
    leaving9 = c(rep(NA,9), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-9)]),
    leaving9_mov = movavg(leaving9, 7, type = "s"),
    dleaving9_mov = c(NA,diff(leaving9_mov)),
    d2leaving9_mov = c(NA,NA,diff(leaving9_mov, differences = 2)),
    leaving10 = c(rep(NA,10), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-10)]),
    leaving10_mov = movavg(leaving10, 7, type = "s"),
    dleaving10_mov = c(NA,diff(leaving10_mov)),
    d2leaving10_mov = c(NA,NA,diff(leaving10_mov, differences = 2)),
    leaving11 = c(rep(NA,11), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-11)]),
    leaving11_mov = movavg(leaving11, 7, type = "s"),
    dleaving11_mov = c(NA,diff(leaving11_mov)),
    d2leaving11_mov = c(NA,NA,diff(leaving11_mov, differences = 2)),
    leaving12 = c(rep(NA,12), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-12)]),
    leaving12_mov = movavg(leaving12, 7, type = "s"),
    dleaving12_mov = c(NA,diff(leaving12_mov)),
    d2leaving12_mov = c(NA,NA,diff(leaving12_mov, differences = 2)),
    leaving13 = c(rep(NA,13), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-13)]),
    leaving13_mov = movavg(leaving13, 7, type = "s"),
    dleaving13_mov = c(NA,diff(leaving13_mov)),
    d2leaving13_mov = c(NA,NA,diff(leaving13_mov, differences = 2)),
    leaving14 = c(rep(NA,14), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-14)]),
    leaving14_mov = movavg(leaving14, 7, type = "s"),
    dleaving14_mov = c(NA,diff(leaving14_mov)),
    d2leaving14_mov = c(NA,NA,diff(leaving14_mov, differences = 2)),
    leaving18 = c(rep(NA,18), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-18)]),
    leaving18_mov = movavg(leaving14, 7, type = "s"),
    dleaving18_mov = c(NA,diff(leaving18_mov)),
    d2leaving18_mov = c(NA,NA,diff(leaving18_mov, differences = 2)),
    leaving21 = c(rep(NA,21), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-21)]),
    leaving21_mov = movavg(leaving21, 7, type = "s"),
    dleaving21_mov = c(NA,diff(leaving21_mov)),
    d2leaving21_mov = c(NA,NA,diff(leaving21_mov, differences = 2)),
    leaving28 = c(rep(NA,28), scc_cases_sd_daily$leaving_home_dif[1:(nrow(scc_cases_sd_daily)-28)]),
    leaving28_mov = movavg(leaving28, 7, type = "s"),
    dleaving28_mov = c(NA,diff(leaving28_mov)),
    d2leaving28_mov = c(NA,NA,diff(leaving28_mov, differences = 2)),
    past_cases = c(NA, scc_cases_sd_daily$cases[1:(nrow(scc_cases_sd_daily)-1)]), 
    cases_growth_daily = (dcases / past_cases)*100,
    cases_growth_daily_mov = movavg(cases_growth_daily, 7, type = "s")
  ) %>% 
  filter(date >= "2020-03-01")

ndiffs(scc_test_big$cases_growth_daily)
## [1] 1
scc_test_big_pre415 <- scc_test_big %>% filter(date <= "2020-04-15")

Lots of plots of movement and cases

Plots testing

# raw, no shifts
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Growth Rate, No Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Cases, No Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = d2cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Change in change in cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Change in Cases, No Lag")

# log cases
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = log_cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - log(cases), No Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County  - change in log of cases, no lag")

# raw, no shifts, pre april 15
scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Case Growth Rate, no Lag, pre 4/15")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~./100+30, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - Change in Log of Cases, no Lag, pre 4/15")

14 day lag

# 14 day shift
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 14 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 14 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 14 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 14 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = d2cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Change in change in cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in change of cases, 14 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = d2cases_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Change in change in cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in change of cases, 14 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving14_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 14 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in log of cases, 14 day lag")

10 day lag

# 10 day lag
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County  - case growth rate, 10 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - case growth rate, 10 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 10 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 10 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving10_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 10 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in log of cases, 10 day lag")

18 day lag

# 18 day lag 
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 18 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 18 Day Lag, pre 4/15")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 18 Day Lag")

scc_test_big_pre415 %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving18_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 18 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 18 Day Lag, pre 4/15")

21 day lag

# 21 day lag??
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving21_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 21 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 21 Day Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving21_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 21 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 21 Day Lag")

28 day lag…

# going wild with a 28 day lag
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving28_mov, color="Leaving home")) +
  geom_line(aes(y = cases_growth_daily_mov-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+30, name = "Daily case growth rate (%), 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 28 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - growth rate, 28 Day Lag")

scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving28_mov, color="Leaving home")) +
  geom_line(aes(y = dcases_mov-40, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~.*1+40, name = "Daily new cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 28 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in cases, 28 Day Lag")

Just log, 7 days

# trying just log plot with 7 day lag
scc_test_big %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = leaving7_mov, color="Leaving home")) +
  geom_line(aes(y = dlog_cases_mov*100-30, color = "Cases")) + 
  scale_y_continuous(sec.axis = sec_axis(~(.+30)/100, name = "Change in log of cases, 7 day moving average")) + 
  scale_colour_manual(values = c("red", "blue")) +
  labs(y = "Change in percent of devices leaving home 7 days ago relative to before, 7 day moving average", x = "Date", color = "Data", title = "Santa Clara County - change in log of cases, 7 day lag")

Testing ardlm

dleaving and d2logcases for 4 lags, moving average

testing_ardl4 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0210782 -0.0014117  0.0005584  0.0017166  0.0262660 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0011101  0.0009779  -1.135 0.260484    
## X.4         -0.0007008  0.0012865  -0.545 0.587818    
## Y.1         -0.0481037  0.1246589  -0.386 0.700844    
## Y.2          0.2334984  0.1275092   1.831 0.071650 .  
## Y.3          0.4307959  0.1059047   4.068 0.000131 ***
## Y.4          0.0513601  0.1156449   0.444 0.658432    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007328 on 65 degrees of freedom
## Multiple R-squared:  0.2442, Adjusted R-squared:  0.1861 
## F-statistic: 4.201 on 5 and 65 DF,  p-value: 0.002272
testing_ardl4_1 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2)))
summary(testing_ardl4_1)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0191964 -0.0025874  0.0008812  0.0016298  0.0301641 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0014823  0.0009735  -1.523 0.132628    
## X.4          0.0006929  0.0010556   0.656 0.513845    
## Y.1         -0.1230445  0.1198310  -1.027 0.308255    
## Y.3          0.3902563  0.1053953   3.703 0.000437 ***
## Y.4          0.0411679  0.1175523   0.350 0.727297    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007458 on 66 degrees of freedom
## Multiple R-squared:  0.2053, Adjusted R-squared:  0.1571 
## F-statistic: 4.261 on 4 and 66 DF,  p-value: 0.003964
testing_ardl4_2 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3)))
summary(testing_ardl4_2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025457 -0.003211  0.000477  0.002497  0.038036 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.002030   0.001050  -1.934   0.0574 .
## X.4          0.001812   0.001103   1.642   0.1052  
## Y.1         -0.088357   0.130304  -0.678   0.5001  
## Y.4         -0.035488   0.126215  -0.281   0.7794  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008135 on 67 degrees of freedom
## Multiple R-squared:  0.04016,    Adjusted R-squared:  -0.002818 
## F-statistic: 0.9344 on 3 and 67 DF,  p-value: 0.429
testing_ardl4_3 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_3)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025727 -0.003421  0.000533  0.002464  0.037708 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.002016   0.001041  -1.936    0.057 .
## X.4          0.001711   0.001036   1.652    0.103  
## Y.1         -0.099093   0.123738  -0.801    0.426  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008079 on 68 degrees of freedom
## Multiple R-squared:  0.03903,    Adjusted R-squared:  0.01076 
## F-statistic: 1.381 on 2 and 68 DF,  p-value: 0.2583
testing_ardl4_4 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(), q=c(2,3,4)))
summary(testing_ardl4_4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0132454 -0.0034096 -0.0004879  0.0035227  0.0193740 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.0014412  0.0008493  -1.697  0.09458 . 
## X.t         -0.0006620  0.0016091  -0.411  0.68215   
## X.1          0.0010576  0.0019109   0.553  0.58187   
## X.2          0.0050034  0.0015057   3.323  0.00148 **
## X.3          0.0027778  0.0016091   1.726  0.08911 . 
## X.4         -0.0036490  0.0013051  -2.796  0.00682 **
## Y.1         -0.2350380  0.1121475  -2.096  0.04006 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006421 on 64 degrees of freedom
## Multiple R-squared:  0.4288, Adjusted R-squared:  0.3752 
## F-statistic: 8.006 on 6 and 64 DF,  p-value: 1.847e-06
testing_ardl4_5 = ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_5)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025727 -0.003421  0.000533  0.002464  0.037708 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.002016   0.001041  -1.936    0.057 .
## X.4          0.001711   0.001036   1.652    0.103  
## Y.1         -0.099093   0.123738  -0.801    0.426  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008079 on 68 degrees of freedom
## Multiple R-squared:  0.03903,    Adjusted R-squared:  0.01076 
## F-statistic: 1.381 on 2 and 68 DF,  p-value: 0.2583
testing_ardl4_6= ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1,2), q=c(2,3,4)))
summary(testing_ardl4_6)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0143467 -0.0040547 -0.0003573  0.0026258  0.0311176 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0018496  0.0009207  -2.009   0.0486 *  
## X.3          0.0064126  0.0014294   4.486 2.92e-05 ***
## X.4         -0.0024872  0.0013087  -1.900   0.0617 .  
## Y.1         -0.3167641  0.1196011  -2.649   0.0101 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007138 on 67 degrees of freedom
## Multiple R-squared:  0.261,  Adjusted R-squared:  0.2279 
## F-statistic: 7.888 on 3 and 67 DF,  p-value: 0.0001395
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0,1), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0138842 -0.0032318 -0.0004221  0.0034649  0.0189539 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0014244  0.0008233  -1.730  0.08831 .  
## X.2          0.0052557  0.0012066   4.356 4.73e-05 ***
## X.3          0.0026571  0.0015345   1.732  0.08802 .  
## X.4         -0.0033944  0.0011806  -2.875  0.00543 ** 
## Y.1         -0.2232091  0.1083528  -2.060  0.04334 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006338 on 66 degrees of freedom
## Multiple R-squared:  0.426,  Adjusted R-squared:  0.3912 
## F-statistic: 12.25 on 4 and 66 DF,  p-value: 1.666e-07
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving_mov, y = scc_test_big$d2log_cases_mov, p = 4, q = 4, remove = list(p = c(0), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.013724 -0.003323 -0.000673  0.003586  0.019547 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.0013885  0.0008342  -1.664  0.10084   
## X.1          0.0005304  0.0014084   0.377  0.70770   
## X.2          0.0049330  0.0014863   3.319  0.00148 **
## X.3          0.0026136  0.0015488   1.687  0.09630 . 
## X.4         -0.0034401  0.0011946  -2.880  0.00538 **
## Y.1         -0.2309722  0.1109951  -2.081  0.04139 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00638 on 65 degrees of freedom
## Multiple R-squared:  0.4273, Adjusted R-squared:  0.3832 
## F-statistic: 9.698 on 5 and 65 DF,  p-value: 5.948e-07

dleaving and d2logcases for 4 lags, no moving average

testing_ardl4 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.108594 -0.011692  0.001610  0.009945  0.095411 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.006197   0.004251  -1.457  0.14979    
## X.4          0.002068   0.001347   1.535  0.12960    
## Y.1         -0.679648   0.121205  -5.607 4.52e-07 ***
## Y.2         -0.568728   0.133252  -4.268 6.54e-05 ***
## Y.3         -0.438087   0.138566  -3.162  0.00238 ** 
## Y.4         -0.190597   0.101444  -1.879  0.06475 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03489 on 65 degrees of freedom
## Multiple R-squared:  0.351,  Adjusted R-squared:  0.3011 
## F-statistic:  7.03 on 5 and 65 DF,  p-value: 2.634e-05
testing_ardl4_1 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2)))
summary(testing_ardl4_1)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.144764 -0.009669  0.001171  0.006960  0.153343 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.002752   0.004687  -0.587    0.559   
## X.4          0.001073   0.001490   0.720    0.474   
## Y.1         -0.385537   0.111962  -3.443    0.001 **
## Y.3         -0.037851   0.114549  -0.330    0.742   
## Y.4         -0.022127   0.104933  -0.211    0.834   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03918 on 66 degrees of freedom
## Multiple R-squared:  0.1691, Adjusted R-squared:  0.1187 
## F-statistic: 3.358 on 4 and 66 DF,  p-value: 0.01457
testing_ardl4_2 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3)))
summary(testing_ardl4_2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.149466 -0.009126  0.002070  0.006699  0.150983 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0026514  0.0046461  -0.571 0.570124    
## X.4          0.0009747  0.0014499   0.672 0.503710    
## Y.1         -0.3824361  0.1108240  -3.451 0.000972 ***
## Y.4          0.0038177  0.0691463   0.055 0.956134    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03892 on 67 degrees of freedom
## Multiple R-squared:  0.1677, Adjusted R-squared:  0.1304 
## F-statistic:   4.5 on 3 and 67 DF,  p-value: 0.006162
testing_ardl4_3 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_3)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.149226 -0.009177  0.002010  0.006665  0.151077 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.002660   0.004609  -0.577  0.56578    
## X.4          0.001011   0.001280   0.790  0.43232    
## Y.1         -0.383527   0.108246  -3.543  0.00072 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03863 on 68 degrees of freedom
## Multiple R-squared:  0.1677, Adjusted R-squared:  0.1432 
## F-statistic: 6.849 on 2 and 68 DF,  p-value: 0.00195
testing_ardl4_4 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(), q=c(2,3,4)))
summary(testing_ardl4_4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.115511 -0.016402 -0.004825  0.008661  0.125574 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0014619  0.0042703   0.342 0.733215    
## X.t          0.0053683  0.0013287   4.040 0.000146 ***
## X.1          0.0037749  0.0014952   2.525 0.014068 *  
## X.2          0.0017172  0.0013293   1.292 0.201080    
## X.3         -0.0003821  0.0012559  -0.304 0.761943    
## X.4          0.0004277  0.0012282   0.348 0.728798    
## Y.1         -0.4519569  0.1099907  -4.109 0.000115 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03442 on 64 degrees of freedom
## Multiple R-squared:  0.3782, Adjusted R-squared:  0.3199 
## F-statistic: 6.489 on 6 and 64 DF,  p-value: 2.207e-05
testing_ardl4_5 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_5)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.149226 -0.009177  0.002010  0.006665  0.151077 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.002660   0.004609  -0.577  0.56578    
## X.4          0.001011   0.001280   0.790  0.43232    
## Y.1         -0.383527   0.108246  -3.543  0.00072 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03863 on 68 degrees of freedom
## Multiple R-squared:  0.1677, Adjusted R-squared:  0.1432 
## F-statistic: 6.849 on 2 and 68 DF,  p-value: 0.00195
testing_ardl4_6= ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1,2), q=c(2,3,4)))
summary(testing_ardl4_6)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.149260 -0.009174  0.002009  0.006684  0.151095 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.655e-03  4.663e-03  -0.569  0.57106    
## X.3          1.638e-05  1.338e-03   0.012  0.99027    
## X.4          1.014e-03  1.304e-03   0.777  0.43970    
## Y.1         -3.836e-01  1.091e-01  -3.516  0.00079 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03892 on 67 degrees of freedom
## Multiple R-squared:  0.1677, Adjusted R-squared:  0.1304 
## F-statistic: 4.499 on 3 and 67 DF,  p-value: 0.00617
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0,1), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.149096 -0.009543  0.001530  0.006562  0.151765 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.568e-03  4.755e-03  -0.540 0.590999    
## X.2          1.743e-04  1.466e-03   0.119 0.905735    
## X.3          3.586e-05  1.358e-03   0.026 0.979005    
## X.4          1.054e-03  1.358e-03   0.777 0.440161    
## Y.1         -3.844e-01  1.101e-01  -3.490 0.000865 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03921 on 66 degrees of freedom
## Multiple R-squared:  0.1679, Adjusted R-squared:  0.1174 
## F-statistic: 3.328 on 4 and 66 DF,  p-value: 0.01521
testing_ardl4_7= ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 4, q = 4, remove = list(p = c(0), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.143175 -0.013751 -0.000918  0.011317  0.153910 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0010639  0.0046959  -0.227 0.821484    
## X.1          0.0034469  0.0016596   2.077 0.041768 *  
## X.2          0.0006313  0.0014472   0.436 0.664135    
## X.3          0.0007070  0.0013635   0.519 0.605841    
## X.4          0.0003697  0.0013652   0.271 0.787381    
## Y.1         -0.5020844  0.1214902  -4.133 0.000105 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03826 on 65 degrees of freedom
## Multiple R-squared:  0.2196, Adjusted R-squared:  0.1596 
## F-statistic: 3.659 on 5 and 65 DF,  p-value: 0.005597

6 lags

testing_ardl6 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2,3,4,5), q=c()))
summary(testing_ardl6)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.138345 -0.013401  0.004258  0.013870  0.144748 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.006730   0.004282  -1.572   0.1208    
## X.6         -0.002287   0.001204  -1.900   0.0618 .  
## Y.1         -0.553200   0.106851  -5.177  2.3e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03518 on 66 degrees of freedom
## Multiple R-squared:  0.2915, Adjusted R-squared:   0.27 
## F-statistic: 13.58 on 2 and 66 DF,  p-value: 1.151e-05
testing_ardl6_1 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2,3,4), q=c()))
summary(testing_ardl6_1)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12561 -0.01297  0.00249  0.01357  0.14795 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.007181   0.004224  -1.700   0.0939 .  
## X.5         -0.002117   0.001209  -1.751   0.0847 .  
## X.6         -0.002518   0.001192  -2.112   0.0386 *  
## Y.1         -0.525211   0.106426  -4.935 5.87e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03464 on 65 degrees of freedom
## Multiple R-squared:  0.3234, Adjusted R-squared:  0.2922 
## F-statistic: 10.36 on 3 and 65 DF,  p-value: 1.167e-05
testing_ardl6_2 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl6_2)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.126507 -0.012434  0.003494  0.014056  0.147071 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0068783  0.0043102  -1.596   0.1155    
## X.4          0.0005629  0.0013229   0.426   0.6719    
## X.5         -0.0020670  0.0012226  -1.691   0.0958 .  
## X.6         -0.0023652  0.0012527  -1.888   0.0636 .  
## Y.1         -0.5176719  0.1085583  -4.769 1.11e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03486 on 64 degrees of freedom
## Multiple R-squared:  0.3253, Adjusted R-squared:  0.2831 
## F-statistic: 7.714 on 4 and 64 DF,  p-value: 3.879e-05
testing_ardl6_3 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1,2), q=c()))
summary(testing_ardl6_3)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.124346 -0.010249  0.006063  0.014241  0.143227 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0078112  0.0043238  -1.807   0.0756 .  
## X.3         -0.0019079  0.0013274  -1.437   0.1556    
## X.4          0.0003701  0.0013188   0.281   0.7799    
## X.5         -0.0024086  0.0012356  -1.949   0.0557 .  
## X.6         -0.0020294  0.0012642  -1.605   0.1134    
## Y.1         -0.5179871  0.1076656  -4.811 9.74e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03458 on 63 degrees of freedom
## Multiple R-squared:  0.3467, Adjusted R-squared:  0.2949 
## F-statistic: 6.688 on 5 and 63 DF,  p-value: 4.696e-05
testing_ardl6_4 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0,1), q=c()))
summary(testing_ardl6_4)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.124608 -0.010532  0.006042  0.014207  0.142690 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0078768  0.0044209  -1.782   0.0797 .  
## X.2         -0.0001192  0.0013503  -0.088   0.9299    
## X.3         -0.0019226  0.0013483  -1.426   0.1589    
## X.4          0.0003437  0.0013624   0.252   0.8017    
## X.5         -0.0023847  0.0012747  -1.871   0.0661 .  
## X.6         -0.0020286  0.0012743  -1.592   0.1165    
## Y.1         -0.5181515  0.1085396  -4.774 1.14e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03485 on 62 degrees of freedom
## Multiple R-squared:  0.3468, Adjusted R-squared:  0.2836 
## F-statistic: 5.487 on 6 and 62 DF,  p-value: 0.0001318
testing_ardl6_5 = ardlDlm(x = scc_test_big$dleaving, y = scc_test_big$d2log_cases, p = 6, q = 1, remove = list(p = c(0), q=c()))
summary(testing_ardl6_5)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.124036 -0.014554  0.001443  0.014035  0.143189 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0064468  0.0043898  -1.469   0.1471    
## X.1          0.0029219  0.0015137   1.930   0.0582 .  
## X.2          0.0002299  0.0013339   0.172   0.8637    
## X.3         -0.0014191  0.0013451  -1.055   0.2956    
## X.4         -0.0001975  0.0013626  -0.145   0.8852    
## X.5         -0.0022017  0.0012512  -1.760   0.0835 .  
## X.6         -0.0015787  0.0012687  -1.244   0.2181    
## Y.1         -0.6116081  0.1167423  -5.239 2.12e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03411 on 61 degrees of freedom
## Multiple R-squared:  0.3844, Adjusted R-squared:  0.3138 
## F-statistic: 5.442 on 7 and 61 DF,  p-value: 6.926e-05

leaving and dcases for 4 lags

# keeping only 4 x lags, removing different y lags
testing_ardl4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c()))
summary(testing_ardl4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.879 -10.171   0.751   6.640  42.219 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.46602    4.29030   1.973 0.052717 .  
## X.4          0.12422    0.16660   0.746 0.458587    
## Y.1          0.49848    0.12341   4.039 0.000144 ***
## Y.2          0.18318    0.13764   1.331 0.187876    
## Y.3          0.08556    0.13788   0.621 0.537064    
## Y.4          0.06895    0.12327   0.559 0.577825    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.38 on 65 degrees of freedom
## Multiple R-squared:  0.5633, Adjusted R-squared:  0.5298 
## F-statistic: 16.77 on 5 and 65 DF,  p-value: 1.302e-10
testing_ardl4_1 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2)))
summary(testing_ardl4_1)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.422  -9.052  -0.986   6.962  41.272 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.84217    4.30592   2.053    0.044 *  
## X.4          0.11460    0.16742   0.685    0.496    
## Y.1          0.57353    0.11041   5.195 2.15e-06 ***
## Y.3          0.14885    0.13017   1.143    0.257    
## Y.4          0.09642    0.12223   0.789    0.433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.47 on 66 degrees of freedom
## Multiple R-squared:  0.5514, Adjusted R-squared:  0.5243 
## F-statistic: 20.28 on 4 and 66 DF,  p-value: 6.212e-11
testing_ardl4_2 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3)))
summary(testing_ardl4_2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.285 -11.347  -2.101   6.693  43.230 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.17858    4.30571   2.132   0.0367 *  
## X.4          0.09499    0.16692   0.569   0.5712    
## Y.1          0.62296    0.10182   6.118 5.53e-08 ***
## Y.4          0.17307    0.10245   1.689   0.0958 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.5 on 67 degrees of freedom
## Multiple R-squared:  0.5426, Adjusted R-squared:  0.5221 
## F-statistic: 26.49 on 3 and 67 DF,  p-value: 2.059e-11
testing_ardl4_3 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_3)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.147  -8.903   0.100   6.314  51.611 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.98698    4.33697   2.303   0.0244 *  
## X.4          0.01763    0.16268   0.108   0.9140    
## Y.1          0.71922    0.08553   8.409 3.96e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.7 on 68 degrees of freedom
## Multiple R-squared:  0.5231, Adjusted R-squared:  0.509 
## F-statistic: 37.29 on 2 and 68 DF,  p-value: 1.168e-11
# all x lags, only 1 y lag
testing_ardl4_4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(), q=c(2,3,4)))
summary(testing_ardl4_4)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.240  -9.202  -0.652   7.991  47.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.32048    4.81150   2.145   0.0358 *  
## X.t          1.07698    0.59973   1.796   0.0772 .  
## X.1         -1.20822    0.74111  -1.630   0.1080    
## X.2         -0.01992    0.70538  -0.028   0.9776    
## X.3         -0.59968    0.73693  -0.814   0.4188    
## X.4          0.79540    0.52516   1.515   0.1348    
## Y.1          0.72983    0.08685   8.403 6.28e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.56 on 64 degrees of freedom
## Multiple R-squared:  0.5591, Adjusted R-squared:  0.5178 
## F-statistic: 13.53 on 6 and 64 DF,  p-value: 7.627e-10
# removing x lags with only 1 y lag
testing_ardl4_5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2,3), q=c(2,3,4)))
summary(testing_ardl4_5)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.147  -8.903   0.100   6.314  51.611 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.98698    4.33697   2.303   0.0244 *  
## X.4          0.01763    0.16268   0.108   0.9140    
## Y.1          0.71922    0.08553   8.409 3.96e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.7 on 68 degrees of freedom
## Multiple R-squared:  0.5231, Adjusted R-squared:  0.509 
## F-statistic: 37.29 on 2 and 68 DF,  p-value: 1.168e-11
testing_ardl4_6= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1,2), q=c(2,3,4)))
summary(testing_ardl4_6)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.562  -9.924  -0.161   7.384  51.324 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.49569    4.36412   2.176   0.0331 *  
## X.3         -0.51485    0.51233  -1.005   0.3185    
## X.4          0.49779    0.50474   0.986   0.3276    
## Y.1          0.70739    0.08633   8.194 1.08e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.69 on 67 degrees of freedom
## Multiple R-squared:  0.5302, Adjusted R-squared:  0.5091 
## F-statistic:  25.2 on 3 and 67 DF,  p-value: 4.989e-11
testing_ardl4_7= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0,1), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.768 -10.755   0.559   7.286  50.315 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.75386    4.48529   1.952   0.0552 .  
## X.2         -0.41619    0.54730  -0.760   0.4497    
## X.3         -0.14876    0.70420  -0.211   0.8333    
## X.4          0.51672    0.50694   1.019   0.3118    
## Y.1          0.70415    0.08671   8.121 1.61e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.74 on 66 degrees of freedom
## Multiple R-squared:  0.5342, Adjusted R-squared:  0.506 
## F-statistic: 18.93 on 4 and 66 DF,  p-value: 2.088e-10
# looking at different y lags included on their own with all x lags
testing_ardl4_7= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(2,3,4)))
summary(testing_ardl4_7)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.154 -10.949   0.748   7.768  50.001 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.92920    4.70203   1.686   0.0965 .  
## X.1         -0.35578    0.57879  -0.615   0.5409    
## X.2         -0.13593    0.71434  -0.190   0.8497    
## X.3         -0.16636    0.70812  -0.235   0.8150    
## X.4          0.58157    0.52016   1.118   0.2677    
## Y.1          0.70417    0.08712   8.083 2.08e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.81 on 65 degrees of freedom
## Multiple R-squared:  0.5369, Adjusted R-squared:  0.5013 
## F-statistic: 15.07 on 5 and 65 DF,  p-value: 8.202e-10
testing_ardl4_8= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(1,2,3)))
summary(testing_ardl4_8)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.680 -11.561  -3.558   8.674  42.927 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.16022    5.43298   3.343  0.00138 ** 
## X.1          0.25293    0.72785   0.348  0.72933    
## X.2         -0.00311    0.88561  -0.004  0.99721    
## X.3         -0.51994    0.87323  -0.595  0.55363    
## X.4          0.34027    0.65018   0.523  0.60251    
## Y.4          0.52383    0.11615   4.510 2.78e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.3 on 65 degrees of freedom
## Multiple R-squared:  0.2928, Adjusted R-squared:  0.2384 
## F-statistic: 5.382 on 5 and 65 DF,  p-value: 0.0003351
testing_ardl4_9= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(1,2,4)))
summary(testing_ardl4_9)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.008 -11.162  -1.654   8.059  47.072 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.8355     5.1871   3.053  0.00328 ** 
## X.1           0.5940     0.6958   0.854  0.39640    
## X.2          -0.3610     0.8320  -0.434  0.66577    
## X.3          -0.9508     0.8269  -1.150  0.25440    
## X.4           0.7947     0.6054   1.313  0.19389    
## Y.3           0.5948     0.1069   5.562  5.4e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.26 on 65 degrees of freedom
## Multiple R-squared:  0.3709, Adjusted R-squared:  0.3225 
## F-statistic: 7.664 on 5 and 65 DF,  p-value: 1.032e-05
testing_ardl4_10= ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dcases, p = 4, q = 4, remove = list(p = c(0), q=c(1,3,4)))
summary(testing_ardl4_10)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.398  -9.452  -3.152   6.970  37.324 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.31430    4.99741   2.464   0.0164 *  
## X.1          0.41075    0.64480   0.637   0.5264    
## X.2         -0.88803    0.78640  -1.129   0.2630    
## X.3         -0.21414    0.77593  -0.276   0.7834    
## X.4          0.69396    0.56946   1.219   0.2274    
## Y.2          0.63891    0.09684   6.597 8.87e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.23 on 65 degrees of freedom
## Multiple R-squared:  0.4439, Adjusted R-squared:  0.4011 
## F-statistic: 10.38 on 5 and 65 DF,  p-value: 2.408e-07

leaving and dlog_cases for up to 5 lags

# all y lags, only x lag of 5
testing_ardl5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,4), q=c()))
summary(testing_ardl5)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.108165 -0.006456 -0.000218  0.007674  0.112385 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0052952  0.0283289   0.187  0.85233    
## X.5          0.0002029  0.0008899   0.228  0.82035    
## Y.1          0.2948595  0.1108391   2.660  0.00989 ** 
## Y.2          0.2168115  0.1125628   1.926  0.05860 .  
## Y.3         -0.0618693  0.1100965  -0.562  0.57614    
## Y.4          0.4044801  0.0920751   4.393 4.37e-05 ***
## Y.5          0.0186078  0.0878808   0.212  0.83299    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03129 on 63 degrees of freedom
## Multiple R-squared:  0.8445, Adjusted R-squared:  0.8297 
## F-statistic: 57.01 on 6 and 63 DF,  p-value: < 2.2e-16
# all x lags, only y lag 1
testing_ardl5_1 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(), q=c(2,3,4,5)))
summary(testing_ardl5_1)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.113344 -0.016694 -0.003508  0.018559  0.085327 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.307e-01  2.789e-02   4.685 1.58e-05 ***
## X.t          4.901e-03  1.329e-03   3.687 0.000479 ***
## X.1         -1.928e-03  1.764e-03  -1.093 0.278660    
## X.2          9.720e-05  1.619e-03   0.060 0.952311    
## X.3         -2.010e-03  1.628e-03  -1.234 0.221718    
## X.4          3.053e-03  1.637e-03   1.865 0.066936 .  
## X.5          6.047e-05  1.213e-03   0.050 0.960396    
## Y.1          4.210e-01  1.159e-01   3.632 0.000572 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03179 on 62 degrees of freedom
## Multiple R-squared:  0.8421, Adjusted R-squared:  0.8242 
## F-statistic: 47.23 on 7 and 62 DF,  p-value: < 2.2e-16
# all x lags, only y lag 2
testing_ardl5_2 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(), q=c(1,3,4,5)))
summary(testing_ardl5_2)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.141281 -0.014170 -0.003306  0.012675  0.069068 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.127736   0.024900   5.130 3.08e-06 ***
## X.t          0.004515   0.001274   3.544 0.000757 ***
## X.1          0.001248   0.001586   0.787 0.434225    
## X.2         -0.002163   0.001654  -1.308 0.195630    
## X.3         -0.002021   0.001575  -1.283 0.204126    
## X.4          0.001881   0.001576   1.194 0.236968    
## X.5          0.000671   0.001131   0.593 0.555309    
## Y.2          0.454660   0.106670   4.262 7.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03078 on 62 degrees of freedom
## Multiple R-squared:  0.8519, Adjusted R-squared:  0.8352 
## F-statistic: 50.94 on 7 and 62 DF,  p-value: < 2.2e-16
# adding in x lags
testing_ardl5_3 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0), q=c(1,3,4,5)))
summary(testing_ardl5_3)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.150291 -0.015545 -0.004782  0.012848  0.083301 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1188892  0.0269508   4.411 4.09e-05 ***
## X.1          0.0045552  0.0013949   3.266  0.00177 ** 
## X.2         -0.0024366  0.0017970  -1.356  0.17996    
## X.3         -0.0002658  0.0016261  -0.163  0.87067    
## X.4          0.0014825  0.0017096   0.867  0.38914    
## X.5          0.0004425  0.0012288   0.360  0.71995    
## Y.2          0.4278442  0.1157496   3.696  0.00046 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03349 on 63 degrees of freedom
## Multiple R-squared:  0.8219, Adjusted R-squared:  0.8049 
## F-statistic: 48.45 on 6 and 63 DF,  p-value: < 2.2e-16
testing_ardl5_4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1), q=c(1,3,4,5)))
summary(testing_ardl5_4)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15618 -0.01633 -0.00478  0.01459  0.08576 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1216601  0.0288998   4.210 8.14e-05 ***
## X.2          0.0013979  0.0014594   0.958  0.34174    
## X.3         -0.0006372  0.0017403  -0.366  0.71548    
## X.4          0.0034289  0.0017191   1.995  0.05035 .  
## X.5         -0.0003827  0.0012901  -0.297  0.76771    
## Y.2          0.3586927  0.1220864   2.938  0.00459 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03593 on 64 degrees of freedom
## Multiple R-squared:  0.7917, Adjusted R-squared:  0.7755 
## F-statistic: 48.66 on 5 and 64 DF,  p-value: < 2.2e-16
testing_ardl5_5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2), q=c(1,3,4,5)))
summary(testing_ardl5_5)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.155993 -0.014162 -0.004484  0.013365  0.092244 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1116142  0.0269125   4.147 9.95e-05 ***
## X.3          0.0003372  0.0014112   0.239  0.81191    
## X.4          0.0033626  0.0017166   1.959  0.05442 .  
## X.5         -0.0002151  0.0012774  -0.168  0.86681    
## Y.2          0.3906141  0.1173752   3.328  0.00144 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03591 on 65 degrees of freedom
## Multiple R-squared:  0.7887, Adjusted R-squared:  0.7757 
## F-statistic: 60.67 on 4 and 65 DF,  p-value: < 2.2e-16
testing_ardl5_6 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3), q=c(1,3,4,5)))
summary(testing_ardl5_6)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.155160 -0.013831 -0.004086  0.013223  0.094281 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.109047   0.024498   4.451 3.37e-05 ***
## X.4          0.003630   0.001292   2.810 0.006518 ** 
## X.5         -0.000228   0.001267  -0.180 0.857751    
## Y.2          0.399896   0.109967   3.636 0.000542 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03565 on 66 degrees of freedom
## Multiple R-squared:  0.7886, Adjusted R-squared:  0.779 
## F-statistic: 82.05 on 3 and 66 DF,  p-value: < 2.2e-16
testing_ardl5_7 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,4), q=c(1,3,4,5)))
summary(testing_ardl5_7)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.156345 -0.012802 -0.005804  0.014654  0.128303 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0856371  0.0241939   3.540 0.000734 ***
## X.5         0.0026401  0.0007884   3.349 0.001335 ** 
## Y.2         0.4897097  0.1105002   4.432 3.55e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03744 on 67 degrees of freedom
## Multiple R-squared:  0.7633, Adjusted R-squared:  0.7562 
## F-statistic:   108 on 2 and 67 DF,  p-value: < 2.2e-16
# trying different single y lags with only one x lag (4)
testing_ardl5_9 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(2,3,4,5)))
summary(testing_ardl5_9)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.128827 -0.014932 -0.007603  0.010000  0.119612 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1018230  0.0233030   4.370 4.36e-05 ***
## X.4         0.0031757  0.0007615   4.170 8.81e-05 ***
## Y.1         0.4701150  0.1064001   4.418 3.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03624 on 68 degrees of freedom
## Multiple R-squared:  0.7941, Adjusted R-squared:  0.788 
## F-statistic: 131.1 on 2 and 68 DF,  p-value: < 2.2e-16
testing_ardl5_10 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,3,4,5)))
summary(testing_ardl5_10)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.159248 -0.014228 -0.005846  0.012120  0.128545 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1261810  0.0251299   5.021 3.94e-06 ***
## X.4         0.0039389  0.0008157   4.829 8.14e-06 ***
## Y.2         0.3462031  0.1142744   3.030  0.00346 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03859 on 68 degrees of freedom
## Multiple R-squared:  0.7665, Adjusted R-squared:  0.7596 
## F-statistic: 111.6 on 2 and 68 DF,  p-value: < 2.2e-16
testing_ardl5_11 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,2,4,5)))
summary(testing_ardl5_11)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.146026 -0.014964 -0.005749  0.011137  0.116249 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1333212  0.0238245   5.596 4.26e-07 ***
## X.4         0.0041698  0.0007751   5.380 9.93e-07 ***
## Y.3         0.2956037  0.1018290   2.903  0.00498 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03878 on 68 degrees of freedom
## Multiple R-squared:  0.7642, Adjusted R-squared:  0.7573 
## F-statistic: 110.2 on 2 and 68 DF,  p-value: < 2.2e-16
testing_ardl5_12 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,2,3,5)))
summary(testing_ardl5_12)
## 
## Time series regression with "ts" data:
## Start = 5, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.125928 -0.016057 -0.007117  0.013790  0.156061 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1387167  0.0233280   5.946 1.05e-07 ***
## X.4         0.0043383  0.0007603   5.706 2.75e-07 ***
## Y.4         0.2720220  0.0997752   2.726  0.00814 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03903 on 68 degrees of freedom
## Multiple R-squared:  0.7611, Adjusted R-squared:  0.7541 
## F-statistic: 108.3 on 2 and 68 DF,  p-value: < 2.2e-16
testing_ardl5_13 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,1,2,3,5), q=c(1,2,3,4)))
summary(testing_ardl5_13)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.112131 -0.017279 -0.006861  0.012955  0.124604 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1532479  0.0189906   8.070 1.80e-11 ***
## X.4         0.0048166  0.0006197   7.773 6.18e-11 ***
## Y.5         0.1865965  0.0809860   2.304   0.0243 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03745 on 67 degrees of freedom
## Multiple R-squared:  0.7631, Adjusted R-squared:  0.756 
## F-statistic: 107.9 on 2 and 67 DF,  p-value: < 2.2e-16
# compare to just having different x values on their own with single y lag
testing_ardl5_14 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,2,3,5), q=c(1,2,3,4)))
summary(testing_ardl5_14)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.100868 -0.017362 -0.007354  0.019674  0.113587 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1576405  0.0164726   9.570 3.69e-14 ***
## X.1         0.0050528  0.0005436   9.295 1.14e-13 ***
## Y.5         0.2855592  0.0630975   4.526 2.53e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03413 on 67 degrees of freedom
## Multiple R-squared:  0.8032, Adjusted R-squared:  0.7973 
## F-statistic: 136.7 on 2 and 67 DF,  p-value: < 2.2e-16
testing_ardl5_15 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,1,3,5), q=c(1,2,3,4)))
summary(testing_ardl5_15)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.114858 -0.018717 -0.007667  0.019610  0.115542 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1482808  0.0181065   8.189 1.10e-11 ***
## X.2         0.0046910  0.0005938   7.899 3.65e-11 ***
## Y.5         0.2712425  0.0722627   3.754 0.000367 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03716 on 67 degrees of freedom
## Multiple R-squared:  0.7667, Adjusted R-squared:  0.7598 
## F-statistic: 110.1 on 2 and 67 DF,  p-value: < 2.2e-16
testing_ardl5_16 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,1,2,5), q=c(1,2,3,4)))
summary(testing_ardl5_16)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.116827 -0.018435 -0.007668  0.008932  0.118335 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1437949  0.0180533   7.965 2.78e-11 ***
## X.3         0.0045355  0.0005909   7.675 9.27e-11 ***
## Y.5         0.2565209  0.0751181   3.415  0.00109 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03767 on 67 degrees of freedom
## Multiple R-squared:  0.7603, Adjusted R-squared:  0.7531 
## F-statistic: 106.2 on 2 and 67 DF,  p-value: < 2.2e-16
testing_ardl5_17 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 5, q = 5, remove = list(p = c(0,4,1,2,3), q=c(1,2,3,4)))
summary(testing_ardl5_17)
## 
## Time series regression with "ts" data:
## Start = 6, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.112037 -0.021885 -0.009319  0.013639  0.167204 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1552352  0.0253111   6.133  5.2e-08 ***
## X.5         0.0047860  0.0008219   5.823  1.8e-07 ***
## Y.5         0.1354347  0.1083249   1.250    0.216    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04208 on 67 degrees of freedom
## Multiple R-squared:  0.7009, Adjusted R-squared:  0.6919 
## F-statistic: 78.49 on 2 and 67 DF,  p-value: < 2.2e-16

Compare with up to 10 lags in x, 5 in y

# all y, all x
testing_ardl10 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(), q=c()))
summary(testing_ardl10)
## 
## Time series regression with "ts" data:
## Start = 11, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.045122 -0.008367 -0.001981  0.009459  0.038077 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.538e-02  2.831e-02   2.663 0.010509 *  
## X.t          2.981e-03  9.597e-04   3.106 0.003176 ** 
## X.1          8.409e-04  1.102e-03   0.763 0.449151    
## X.2          9.294e-05  1.082e-03   0.086 0.931907    
## X.3         -1.030e-03  1.116e-03  -0.923 0.360576    
## X.4         -1.004e-03  1.102e-03  -0.912 0.366544    
## X.5         -5.883e-04  1.161e-03  -0.507 0.614625    
## X.6         -4.386e-04  1.013e-03  -0.433 0.666868    
## X.7          2.568e-03  1.035e-03   2.482 0.016635 *  
## X.8         -1.772e-04  1.079e-03  -0.164 0.870274    
## X.9         -1.240e-03  1.016e-03  -1.220 0.228265    
## X.10         5.826e-04  8.453e-04   0.689 0.493973    
## Y.1          3.158e-01  1.379e-01   2.289 0.026522 *  
## Y.2          1.435e-01  1.184e-01   1.212 0.231598    
## Y.3         -1.080e-01  1.003e-01  -1.076 0.287120    
## Y.4          3.430e-01  9.358e-02   3.666 0.000616 ***
## Y.5          1.626e-02  8.803e-02   0.185 0.854268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01761 on 48 degrees of freedom
## Multiple R-squared:  0.9431, Adjusted R-squared:  0.9241 
## F-statistic: 49.69 on 16 and 48 DF,  p-value: < 2.2e-16
# all x, only one y
testing_ardl10_1 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(), q=c(1,2,3,4)))
summary(testing_ardl10_1)
## 
## Time series regression with "ts" data:
## Start = 11, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.037816 -0.013624 -0.002767  0.009762  0.076898 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.719e-01  2.258e-02   7.614 5.21e-10 ***
## X.t          2.544e-03  1.163e-03   2.188   0.0332 *  
## X.1          1.453e-03  1.299e-03   1.119   0.2685    
## X.2          1.288e-03  1.295e-03   0.994   0.3247    
## X.3         -1.489e-03  1.297e-03  -1.147   0.2565    
## X.4          8.663e-04  1.258e-03   0.688   0.4943    
## X.5         -1.900e-03  1.428e-03  -1.330   0.1894    
## X.6         -5.141e-05  1.238e-03  -0.042   0.9670    
## X.7          1.213e-03  1.269e-03   0.956   0.3437    
## X.8          1.680e-03  1.243e-03   1.351   0.1825    
## X.9         -1.255e-03  1.253e-03  -1.002   0.3212    
## X.10         1.354e-03  9.629e-04   1.406   0.1656    
## Y.5          2.444e-01  8.815e-02   2.773   0.0077 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0223 on 52 degrees of freedom
## Multiple R-squared:  0.9011, Adjusted R-squared:  0.8783 
## F-statistic:  39.5 on 12 and 52 DF,  p-value: < 2.2e-16
# testing single x values with only one y
testing_ardl10_2 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,7,8,9), q=c(1,2,3,4)))
summary(testing_ardl10_2)
## 
## Time series regression with "ts" data:
## Start = 11, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07240 -0.01008 -0.00305  0.01205  0.16198 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0886163  0.0192214   4.610 2.06e-05 ***
## X.10        0.0028150  0.0006289   4.476 3.32e-05 ***
## Y.5         0.3329674  0.0939228   3.545 0.000753 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03214 on 62 degrees of freedom
## Multiple R-squared:  0.7551, Adjusted R-squared:  0.7472 
## F-statistic:  95.6 on 2 and 62 DF,  p-value: < 2.2e-16
testing_ardl10_3 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,7,8,10), q=c(1,2,3,4)))
summary(testing_ardl10_3)
## 
## Time series regression with "ts" data:
## Start = 10, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.076992 -0.012253 -0.003747  0.011746  0.155252 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0985636  0.0205377   4.799 1.02e-05 ***
## X.9         0.0031174  0.0006673   4.672 1.62e-05 ***
## Y.5         0.3039371  0.0956797   3.177  0.00231 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03163 on 63 degrees of freedom
## Multiple R-squared:  0.7757, Adjusted R-squared:  0.7686 
## F-statistic: 108.9 on 2 and 63 DF,  p-value: < 2.2e-16
testing_ardl10_4 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,7,9,10), q=c(1,2,3,4)))
summary(testing_ardl10_4)
## 
## Time series regression with "ts" data:
## Start = 9, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.076567 -0.013837 -0.003512  0.007458  0.140990 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1221159  0.0219795   5.556 5.73e-07 ***
## X.8         0.0038902  0.0007086   5.490 7.38e-07 ***
## Y.5         0.2491341  0.1004681   2.480   0.0158 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03328 on 64 degrees of freedom
## Multiple R-squared:  0.7843, Adjusted R-squared:  0.7775 
## F-statistic: 116.3 on 2 and 64 DF,  p-value: < 2.2e-16
testing_ardl10_5 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,6,8,9,10), q=c(1,2,3,4)))
summary(testing_ardl10_5)
## 
## Time series regression with "ts" data:
## Start = 8, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.062329 -0.015881 -0.004658  0.011522  0.119788 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1400491  0.0204781   6.839 3.34e-09 ***
## X.7         0.0044620  0.0006569   6.793 4.02e-09 ***
## Y.5         0.1769083  0.0931879   1.898   0.0621 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03064 on 65 degrees of freedom
## Multiple R-squared:  0.8146, Adjusted R-squared:  0.8089 
## F-statistic: 142.8 on 2 and 65 DF,  p-value: < 2.2e-16
testing_ardl10_6 = ardlDlm(x = scc_test_big$leaving_home_dif, y = scc_test_big$dlog_cases, p = 10, q = 5, remove = list(p = c(0,1,2,3,4,5,7,8,9,10), q=c(1,2,3,4)))
summary(testing_ardl10_6)
## 
## Time series regression with "ts" data:
## Start = 7, End = 75
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.068284 -0.013166 -0.005343  0.007768  0.160932 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.1090246  0.0212746   5.125 2.80e-06 ***
## X.6         0.0034374  0.0006868   5.005 4.41e-06 ***
## Y.5         0.3178040  0.0909899   3.493 0.000858 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0341 on 66 degrees of freedom
## Multiple R-squared:   0.78,  Adjusted R-squared:  0.7733 
## F-statistic:   117 on 2 and 66 DF,  p-value: < 2.2e-16

Zip code data

Get the zip code case data for SCC - this doesn’t work, running into issues with Rselenium and scrolling

# remDr$navigate("https://app.powerbigov.us/view?r=eyJrIjoiZTg2MTlhMWQtZWE5OC00ZDI3LWE4NjAtMTU3YWYwZDRlOTNmIiwidCI6IjBhYzMyMDJmLWMzZTktNGY1Ni04MzBkLTAxN2QwOWQxNmIzZiJ9&pageName=ReportSectiona1d27339c9acd841e1fa")
# 
# 
# webElem <- remDr$findElements(using = "class", value = "pivotTableCellWrap")
# 
# website_raw <-
#   1:length(webElem) %>%
#   map(function(x){
#     webElem[[x]]$getElementAttribute("innerText") %>% as.character()
#   }) %>%
#   unlist() %>%
#   as.data.frame() %>%
#   mutate(convertedVals = as.character(.))
# 
# zips_scc <- zipcode_scc_raw$convertedVals[4:(length(zipcode_scc_raw$convertedVals)/3+2)]
# counts_scc <- zipcode_scc_raw$convertedVals[(length(zipcode_scc_raw$convertedVals)/3+3):(2*length(zipcode_scc_raw$convertedVals)/3+2)]
# perc_pop_scc <- zipcode_scc_raw$convertedVals[(2*length(zipcode_scc_raw$convertedVals)/3+3):length(zipcode_scc_raw$convertedVals)]
# 
# zipcode_scc <- data.frame(zips_scc, counts_scc, perc_pop_scc)

Zip code social distancing data and case data from SMC (old case data)

zctas_94 <- 
  zctas(cb = F, starts_with = "94") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

smc_bg_zctas <- smc_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

sd_date_cutoff <- as.Date("2020-05-10")
pre_sd_date_cutoff <- as.Date("2020-03-01")
shelter_date <- as.Date("2020-03-16")

smc_sd_zip <- bay_sd %>%
  filter(origin_census_block_group %in% smc_blockgroups$GEOID) %>%
  filter(date <= sd_date_cutoff & date >= shelter_date)  %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

# get block group averages leaving home before shelter in place/COVID-19
smc_pre_sd_leaving_avg <- bay_sd %>%
  filter(origin_census_block_group %in% smc_blockgroups$GEOID) %>%
  filter(date <= pre_sd_date_cutoff)  %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home_pre = total_at_home_zip*100/total_devices, 
    percent_leaving_home_pre = (100 - percent_at_home_pre)
  )

# combine
smc_sd_zip <- smc_sd_zip %>% 
  left_join(smc_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

# load case data, stored from what was released through 4/30
smc_zip_cases <- read_csv("smc_cases_4.29.2020.csv")

# join case data and social distancing data
smc_sd_cases_zip <- left_join(smc_zip_cases, smc_sd_zip)

smc_sd_cases_zip[smc_sd_cases_zip == "<10"] <- "10"

smc_sd_cases_zip <- smc_sd_cases_zip %>% mutate(cases = as.numeric(cases))

smc_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = cases)) + geom_point() + geom_smooth(method=lm)

testing_zip_cases_sd_abs <- lm(cases ~ dif_percent_leaving, smc_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_abs)
## 
## Call:
## lm(formula = cases ~ dif_percent_leaving, data = smc_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31.40 -28.69 -12.14  15.01 117.76 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)          36.7619    34.9110   1.053    0.302
## dif_percent_leaving  -0.1736     1.3192  -0.132    0.896
## 
## Residual standard error: 36.82 on 25 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.0006919,  Adjusted R-squared:  -0.03928 
## F-statistic: 0.01731 on 1 and 25 DF,  p-value: 0.8964

Compare with demographic variables (also normalize by population). The chart below shows the difference in percent leaving home from shelter in place through 5/10 averaged over that time period relative to Jan+Feb 2020 and the cases normalized by population, with each point a zip code in San Mateo County.

# obtain the saved census data 
setwd("~/Documents/2020 Spring Quarter/CEE 218Z")
acs_vars = readRDS("censusData2018_acs_acs5.rds")
setwd("~/Documents/2020 Spring Quarter/CEE 218Z/covid19")

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}

# get population data
smc_fips <- fips("CA", "San Mateo") %>% substr(3,5)

smc_pop_zip <- pullCensus("B01003_001E", smc_fips) %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

smc_sd_cases_zip <- smc_sd_cases_zip %>% left_join(smc_pop_zip) %>%
  mutate(cases_by_pop = cases / total_pop_zip)

smc_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("San Mateo County") + geom_smooth(method=lm)

testing_zip_cases_sd <- lm(cases_by_pop ~ dif_percent_leaving, smc_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = smc_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0033257 -0.0009319 -0.0005497  0.0000597  0.0153501 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         0.0100169  0.0033124   3.024   0.0057 **
## dif_percent_leaving 0.0002906  0.0001252   2.322   0.0287 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003493 on 25 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1774, Adjusted R-squared:  0.1445 
## F-statistic: 5.392 on 1 and 25 DF,  p-value: 0.02867
# try absolute percents leaving
smc_sd_cases_zip %>% ggplot(
  aes(x = percent_leaving_home, y = cases_by_pop)) + geom_point() + ggtitle("San Mateo County") + geom_smooth(method=lm)

testing_zip_cases_sd_abs <- lm(cases_by_pop ~ percent_leaving_home, smc_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_abs)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = smc_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0033897 -0.0013731 -0.0008541  0.0003338  0.0154641 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          -0.0175050  0.0095295  -1.837   0.0781 .
## percent_leaving_home  0.0003882  0.0001846   2.103   0.0457 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00355 on 25 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1503, Adjusted R-squared:  0.1163 
## F-statistic: 4.423 on 1 and 25 DF,  p-value: 0.04568

What if we look at the change in leaving home just in the first week after shelter in place? Inspired by the recent articles on how interventions in the immediate time frame might have been most important.

First let’s just see how the change in leaving home compares across zip codes.

# preserve dates 
smc_sd_zip_by_date <- bay_sd %>%
  filter(origin_census_block_group %in% smc_blockgroups$GEOID) %>%
  filter(date <= sd_date_cutoff & date >= shelter_date)  %>% 
  left_join(smc_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode, date) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

# combine with pre shelter in place data
smc_sd_zip_by_date <- smc_sd_zip_by_date %>% 
  left_join(smc_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

smc_sd_zip_by_date %>% ggplot(
  aes(x = date)) +
    geom_line(aes(y = dif_percent_leaving, color=zipcode))

That’s a mess. But there is definitely variability.

# just look at the first week after the shelter in place order
smc_zip_week_post <- smc_sd_zip_by_date %>%
  dplyr::select(-c(percent_at_home, percent_leaving_home, dif_percent_leaving)) %>%
  filter(date <= shelter_date + 7) %>%
  ungroup() %>%
  group_by(zipcode) %>%
  summarize(total_at_home_week = sum(total_at_home_zip), total_devices_week = sum(total_devices)) %>%
  left_join(smc_sd_cases_zip %>% dplyr::select(zipcode, cases, cases_by_pop, percent_leaving_home_pre)) %>%
  mutate(percent_at_home_week = total_at_home_week*100/total_devices_week, 
    percent_leaving_home_week = (100 - percent_at_home_week),
    dif_percent_leaving_week = percent_leaving_home_week - percent_leaving_home_pre)

# plot
smc_zip_week_post %>% ggplot(
  aes(x = dif_percent_leaving_week, y = cases_by_pop)) + geom_point() + ggtitle("San Mateo County, difference in percent leaving for the week after shelter in place")

testing_zip_cases_sd <- lm(cases_by_pop ~ dif_percent_leaving_week, smc_zip_week_post, na.action = na.omit)

summary(testing_zip_cases_sd)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving_week, data = smc_zip_week_post, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0050029 -0.0011430 -0.0003392 -0.0000695  0.0146226 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              0.0082126  0.0027149   3.025  0.00569 **
## dif_percent_leaving_week 0.0002549  0.0001170   2.179  0.03898 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003531 on 25 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1596, Adjusted R-squared:  0.126 
## F-statistic: 4.747 on 1 and 25 DF,  p-value: 0.03898

Try adding in San Francisco county.

# geometries 
sf_blockgroups <- 
  block_groups("CA","San Francisco", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

sf_bg_zctas <- sf_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

# get cases from LA Times
sf_place_cases_max <- 
  read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv") %>%
  filter(county == 'San Francisco') %>%
  filter(date == max(date))

# get population data
sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)

sf_pop_zip <- pullCensus("B01003_001E", sf_fips) %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

sf_cases_zip <- sf_place_cases_max %>% left_join(sf_pop_zip, by = c("place" = "zipcode")) %>%
  mutate(cases_by_pop = confirmed_cases / total_pop_zip) %>%
  rename(zipcode = place)

# social distancing data
sf_sd_zip <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  filter(date <= sd_date_cutoff & date >= shelter_date)  %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home = total_at_home_zip*100/total_devices, 
    percent_leaving_home = (100 - percent_at_home)
  )

# get block group averages leaving home before shelter in place/COVID-19
sf_pre_sd_leaving_avg <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
  filter(date <= pre_sd_date_cutoff)  %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>% 
  group_by(zipcode) %>%
  summarize(total_at_home_zip = sum(completely_home_device_count), total_devices = sum(device_count)) %>% 
  mutate(
    percent_at_home_pre = total_at_home_zip*100/total_devices, 
    percent_leaving_home_pre = (100 - percent_at_home_pre)
  )

# combine
sf_sd_zip <- sf_sd_zip %>% 
  left_join(sf_pre_sd_leaving_avg %>% dplyr::select(zipcode, percent_leaving_home_pre)) %>%
  mutate(dif_percent_leaving = percent_leaving_home - percent_leaving_home_pre)

sf_sd_cases_zip <- left_join(sf_cases_zip, sf_sd_zip)

sf_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("San Francisco County") + geom_smooth(method=lm)

testing_zip_cases_sd_sf <- lm(cases_by_pop ~ dif_percent_leaving, sf_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_sf)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = sf_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0019481 -0.0010927 -0.0007924  0.0010647  0.0031503 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)         2.405e-03  1.726e-03   1.393    0.177
## dif_percent_leaving 1.278e-05  7.558e-05   0.169    0.867
## 
## Residual standard error: 0.001639 on 23 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.001241,   Adjusted R-squared:  -0.04218 
## F-statistic: 0.02859 on 1 and 23 DF,  p-value: 0.8672
# absolute cases
sf_sd_cases_zip %>% ggplot(
  aes(x = dif_percent_leaving, y = confirmed_cases)) + geom_point() + ggtitle("San Francisco County") + geom_smooth(method=lm)

# absolute percents
sf_sd_cases_zip %>% ggplot(
  aes(x = percent_leaving_home, y = cases_by_pop)) + geom_point() + ggtitle("San Francisco County") + geom_smooth(method=lm) 

testing_zip_cases_sd_sf_abs <- lm(cases_by_pop ~ percent_leaving_home, sf_sd_cases_zip, na.action = na.omit)

summary(testing_zip_cases_sd_sf_abs)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_leaving_home, data = sf_sd_cases_zip, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0018811 -0.0010749 -0.0006076  0.0006400  0.0030092 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           7.105e-03  3.854e-03   1.844   0.0782 .
## percent_leaving_home -9.529e-05  7.340e-05  -1.298   0.2071  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001583 on 23 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.06827,    Adjusted R-squared:  0.02776 
## F-statistic: 1.685 on 1 and 23 DF,  p-value: 0.2071

Combine both SMC and SF

smc_sf_combined <- rbind(sf_sd_cases_zip %>% dplyr::select(dif_percent_leaving, cases_by_pop), smc_sd_cases_zip %>% dplyr::select(dif_percent_leaving, cases_by_pop))

smc_sf_combined %>% ggplot(
  aes(x = dif_percent_leaving, y = cases_by_pop)) + geom_point() + ggtitle("San Francisco County and San Mateo County") + geom_smooth(method=lm)

smc_sf_combined_lm <- lm(cases_by_pop ~ dif_percent_leaving, smc_sf_combined, na.action = na.omit)

summary(smc_sf_combined_lm)
## 
## Call:
## lm(formula = cases_by_pop ~ dif_percent_leaving, data = smc_sf_combined, 
##     na.action = na.omit)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0036623 -0.0012405 -0.0005102  0.0004377  0.0164077 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         6.026e-03  1.872e-03   3.219  0.00226 **
## dif_percent_leaving 1.534e-04  7.553e-05   2.031  0.04764 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002833 on 50 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.07618,    Adjusted R-squared:  0.0577 
## F-statistic: 4.123 on 1 and 50 DF,  p-value: 0.04764